1 Introduction

2 Packages

library(org.Hs.eg.db)
## Loading required package: AnnotationDbi
## Loading required package: stats4
## Loading required package: BiocGenerics
## Loading required package: parallel
## 
## Attaching package: 'BiocGenerics'
## The following objects are masked from 'package:parallel':
## 
##     clusterApply, clusterApplyLB, clusterCall, clusterEvalQ,
##     clusterExport, clusterMap, parApply, parCapply, parLapply,
##     parLapplyLB, parRapply, parSapply, parSapplyLB
## The following objects are masked from 'package:stats':
## 
##     IQR, mad, sd, var, xtabs
## The following objects are masked from 'package:base':
## 
##     anyDuplicated, append, as.data.frame, basename, cbind, colnames,
##     dirname, do.call, duplicated, eval, evalq, Filter, Find, get, grep,
##     grepl, intersect, is.unsorted, lapply, Map, mapply, match, mget,
##     order, paste, pmax, pmax.int, pmin, pmin.int, Position, rank,
##     rbind, Reduce, rownames, sapply, setdiff, sort, table, tapply,
##     union, unique, unsplit, which.max, which.min
## Loading required package: Biobase
## Welcome to Bioconductor
## 
##     Vignettes contain introductory material; view with
##     'browseVignettes()'. To cite Bioconductor, see
##     'citation("Biobase")', and for packages 'citation("pkgname")'.
## Loading required package: IRanges
## Loading required package: S4Vectors
## 
## Attaching package: 'S4Vectors'
## The following objects are masked from 'package:base':
## 
##     expand.grid, I, unname
## 
library(DESeq2)
## Loading required package: GenomicRanges
## Loading required package: GenomeInfoDb
## Loading required package: SummarizedExperiment
## Loading required package: MatrixGenerics
## Loading required package: matrixStats
## 
## Attaching package: 'matrixStats'
## The following objects are masked from 'package:Biobase':
## 
##     anyMissing, rowMedians
## 
## Attaching package: 'MatrixGenerics'
## The following objects are masked from 'package:matrixStats':
## 
##     colAlls, colAnyNAs, colAnys, colAvgsPerRowSet, colCollapse,
##     colCounts, colCummaxs, colCummins, colCumprods, colCumsums,
##     colDiffs, colIQRDiffs, colIQRs, colLogSumExps, colMadDiffs,
##     colMads, colMaxs, colMeans2, colMedians, colMins, colOrderStats,
##     colProds, colQuantiles, colRanges, colRanks, colSdDiffs, colSds,
##     colSums2, colTabulates, colVarDiffs, colVars, colWeightedMads,
##     colWeightedMeans, colWeightedMedians, colWeightedSds,
##     colWeightedVars, rowAlls, rowAnyNAs, rowAnys, rowAvgsPerColSet,
##     rowCollapse, rowCounts, rowCummaxs, rowCummins, rowCumprods,
##     rowCumsums, rowDiffs, rowIQRDiffs, rowIQRs, rowLogSumExps,
##     rowMadDiffs, rowMads, rowMaxs, rowMeans2, rowMedians, rowMins,
##     rowOrderStats, rowProds, rowQuantiles, rowRanges, rowRanks,
##     rowSdDiffs, rowSds, rowSums2, rowTabulates, rowVarDiffs, rowVars,
##     rowWeightedMads, rowWeightedMeans, rowWeightedMedians,
##     rowWeightedSds, rowWeightedVars
## The following object is masked from 'package:Biobase':
## 
##     rowMedians
library(readxl)
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5     ✓ purrr   0.3.4
## ✓ tibble  3.1.2     ✓ dplyr   1.0.7
## ✓ tidyr   1.1.3     ✓ stringr 1.4.0
## ✓ readr   1.4.0     ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::collapse()   masks IRanges::collapse()
## x dplyr::combine()    masks Biobase::combine(), BiocGenerics::combine()
## x dplyr::count()      masks matrixStats::count()
## x dplyr::desc()       masks IRanges::desc()
## x tidyr::expand()     masks S4Vectors::expand()
## x dplyr::filter()     masks stats::filter()
## x dplyr::first()      masks S4Vectors::first()
## x dplyr::lag()        masks stats::lag()
## x ggplot2::Position() masks BiocGenerics::Position(), base::Position()
## x purrr::reduce()     masks GenomicRanges::reduce(), IRanges::reduce()
## x dplyr::rename()     masks S4Vectors::rename()
## x dplyr::select()     masks AnnotationDbi::select()
## x dplyr::slice()      masks IRanges::slice()

3 Import data

raw_counts <- read_tsv("~/SURFdrive/Zbook_mappen/Cardio/Projects/Marian - EndoMT/in_vitro_bulk_counts/MW_combined_raw_counts.txt")
## 
## ── Column specification ────────────────────────────────────────────────────────
## cols(
##   .default = col_double(),
##   gene = col_character()
## )
## ℹ Use `spec()` for the full column specifications.
coldata <- read_xlsx("~/SURFdrive/Zbook_mappen/Cardio/Projects/Marian - EndoMT/in_vitro_bulk_counts/Sample overview EndoMT_01-04-21_Checked.xlsx")
## New names:
## * `` -> ...1

3.1 Tidy

# counts
raw_counts <- raw_counts %>% 
  rename_all(
    funs(
      str_replace_all(., ".sam.counts", "") # remove .sam.counts
    )
  ) %>% 
  select(-`15HCAECT6C`) %>% # -`5HCAECT2TGF`, t = 0 controls: -`1HCAECT0`, -`2HCAECT0`
  # select(-ends_with("mix")) %>%
  as.data.frame() %>% 
  column_to_rownames("gene")
## Warning: `funs()` was deprecated in dplyr 0.8.0.
## Please use a list of either functions or lambdas: 
## 
##   # Simple named list: 
##   list(mean = mean, median = median)
## 
##   # Auto named with `tibble::lst()`: 
##   tibble::lst(mean, median)
## 
##   # Using lambdas
##   list(~ mean(., trim = .2), ~ median(., na.rm = TRUE))
raw_counts
# coldata
coldata <- coldata %>% 
  rename_all(tolower) %>% 
  rename(sample_name = "sample name",
         cells_passage = "cells + passage", 
         time_point = "time point (h)",
         stimulus = stimuli,
         exp_number = "#",
         sample_available = 1) %>% 
  separate(cells_passage, into = c("cell_type", "passage")) %>% 
  mutate(across(.cols = stimulus, ~str_replace(., "10ng/ml ", ""))) %>% 
  mutate(across(.cols = stimulus, ~str_replace(., "\\+", "_"))) %>% 
  mutate_at(vars(stimulus, duplicate, time_point), list(as.factor)) %>% 
  mutate(file_name = paste(exp_number, sample_name, sep = "")) %>% 
  mutate(sample_available = str_replace_all(sample_available, c("no sample"= "No") ) ) %>% 
  mutate(sample_available = replace_na(sample_available, "Yes") )

coldata

3.2 Define coldata

coldata_from_data <- colnames(raw_counts) %>% 
  as.data.frame() %>% 
  rename(file_name = 1) 

3.3 Check colnames counts and rownames coldata

coldata <- coldata_from_data %>% 
  left_join(coldata, by = "file_name") %>% 
  mutate(stim_time = paste(stimulus, time_point, sep = "_")) %>% 
  mutate_at(vars(stim_time), list(as.factor)) %>% 
  as.data.frame() %>% 
  column_to_rownames("file_name")

coldata
all(rownames(coldata) %in% colnames(raw_counts))
## [1] TRUE

4 Construct DESeqDataSet

dds <- DESeqDataSetFromMatrix(countData = raw_counts,
                              colData = coldata,
                              design = ~ stim_time)
## converting counts to integer mode
dds
## class: DESeqDataSet 
## dim: 51248 47 
## metadata(1): version
## assays(1): counts
## rownames(51248): ENSG00000000003 ENSG00000000005 ... ENSG00000283124
##   ENSG00000283125
## rowData names(0):
## colnames(47): 1HCAECT0 2HCAECT0 ... 72HCAECT72TNF 74HCAECT72mix
## colData names(10): sample_available exp_number ... duplicate stim_time

5 Pre-filtering

nrow(dds)
## [1] 51248
keep <- rowSums(counts(dds)) >= 10
dds <- dds[keep, ]
nrow(dds)
## [1] 34155

6 Differential expression analysis

library("BiocParallel")
register(MulticoreParam(8))

dds <- DESeq(dds, parallel = TRUE)
## estimating size factors
## estimating dispersions
## gene-wise dispersion estimates: 8 workers
## mean-dispersion relationship
## final dispersion estimates, fitting model and testing: 8 workers
res <- results(dds, parallel = TRUE)
res
## log2 fold change (MLE): stim time TNFa 72 vs control 0 
## Wald test p-value: stim time TNFa 72 vs control 0 
## DataFrame with 34155 rows and 6 columns
##                  baseMean log2FoldChange     lfcSE      stat      pvalue
##                 <numeric>      <numeric> <numeric> <numeric>   <numeric>
## ENSG00000000003  157.1867      0.3116641  0.223008  1.397546 1.62250e-01
## ENSG00000000419   61.4354     -0.2596956  0.283686 -0.915432 3.59965e-01
## ENSG00000000457  217.3070      1.6888254  0.233650  7.228012 4.90114e-13
## ENSG00000000460   14.5627     -0.6296458  0.559339 -1.125695 2.60294e-01
## ENSG00000000971  114.7365      0.0380823  0.233682  0.162966 8.70545e-01
## ...                   ...            ...       ...       ...         ...
## ENSG00000283117  0.807028      -1.649717  2.586147 -0.637905    0.523535
## ENSG00000283120 73.054219      -0.438096  0.276508 -1.584388    0.113106
## ENSG00000283121  2.438151       0.395130  1.198042  0.329813    0.741541
## ENSG00000283122  0.734711      -1.185959  2.346772 -0.505357    0.613308
## ENSG00000283124  6.067954      -1.412803  0.884292 -1.597665    0.110118
##                        padj
##                   <numeric>
## ENSG00000000003 4.31903e-01
## ENSG00000000419 6.53447e-01
## ENSG00000000457 3.68833e-11
## ENSG00000000460 5.57268e-01
## ENSG00000000971 9.52769e-01
## ...                     ...
## ENSG00000283117          NA
## ENSG00000283120    0.346383
## ENSG00000283121          NA
## ENSG00000283122          NA
## ENSG00000283124    0.340474
resOrdered <- res[order(res$pvalue),]
summary(res)
## 
## out of 34155 with nonzero total read count
## adjusted p-value < 0.1
## LFC > 0 (up)       : 1753, 5.1%
## LFC < 0 (down)     : 1582, 4.6%
## outliers [1]       : 0, 0%
## low counts [2]     : 15893, 47%
## (mean count < 6)
## [1] see 'cooksCutoff' argument of ?results
## [2] see 'independentFiltering' argument of ?results

6.0.1 Comparisons

var_pairs <- crossing(unique(coldata$stimulus), unique(coldata$time_point) )  %>% 
  setNames(c("stimulus", "time_point"))

So, there are 28 different comparisons we can make based on combinations of stimulus and time_point. And although proper time series analysis is preferred, we lack a time point for TNF+TGF, and we lack conditions for t=0. Therefore, here we perform additional analyses assessing DEG between all possible combinations of stimuli and times t=2-72 points vs. t=0.

resultsnames <- resultsNames(dds)[-1]
resultsnames
##  [1] "stim_time_control_12_vs_control_0"   "stim_time_control_2_vs_control_0"   
##  [3] "stim_time_control_24_vs_control_0"   "stim_time_control_48_vs_control_0"  
##  [5] "stim_time_control_6_vs_control_0"    "stim_time_control_72_vs_control_0"  
##  [7] "stim_time_TGFb_12_vs_control_0"      "stim_time_TGFb_2_vs_control_0"      
##  [9] "stim_time_TGFb_24_vs_control_0"      "stim_time_TGFb_48_vs_control_0"     
## [11] "stim_time_TGFb_6_vs_control_0"       "stim_time_TGFb_72_vs_control_0"     
## [13] "stim_time_TGFb_TNFa_12_vs_control_0" "stim_time_TGFb_TNFa_24_vs_control_0"
## [15] "stim_time_TGFb_TNFa_48_vs_control_0" "stim_time_TGFb_TNFa_6_vs_control_0" 
## [17] "stim_time_TGFb_TNFa_72_vs_control_0" "stim_time_TNFa_12_vs_control_0"     
## [19] "stim_time_TNFa_2_vs_control_0"       "stim_time_TNFa_24_vs_control_0"     
## [21] "stim_time_TNFa_48_vs_control_0"      "stim_time_TNFa_6_vs_control_0"      
## [23] "stim_time_TNFa_72_vs_control_0"

Apply over results

results_applier <- function(name) {
  results(dds, name = name, parallel = TRUE)
}

results_applier("stim_time_control_12_vs_control_0")

res_list <- lapply(resultsnames, results_applier)
names(res_list) <- resultsnames

dir.create("deg_cond_vs_t0")
lapply(names(res_list), function(x) {
  res_list[[x]]$symbol <- mapIds(org.Hs.eg.db,
                    keys = rownames(res_list[[x]]),
                    column = "SYMBOL",    ## output type, and output column name
                    keytype = "ENSEMBL",  ## source type
                    multiVals = "first")
  as.data.frame(res_list[[x]]) %>% 
    rownames_to_column("ensembl") %>%
    arrange(padj) %>% 
    # write.table(file = paste(x, ".txt"), sep = "\t")
    write_tsv(file = paste("deg_cond_vs_t0/", x, ".txt", sep  = ""))
  # as.data.frame(res_list[[x]][order(res_list[[x]]$padj, decreasing = TRUE),])

  # write.table(res_list[[x]], file = paste(x, ".txt"), sep = "\t")
})

7 Data transformations and visualization

7.1 Transformed values

vsd <- vst(dds, blind = FALSE)
# rld <- rlog(dds, blind = FALSE)
# head(assay(rld), 3)
head(assay(vsd), 3)
##                 1HCAECT0 2HCAECT0 3HCAECT2C 5HCAECT2TGF 6HCAECT2TNF 9HCAECT2C
## ENSG00000000003 7.402413 7.502696  7.641769    6.453079    7.489241  7.531814
## ENSG00000000419 7.139459 6.998412  7.229363    6.453079    7.261347  7.160053
## ENSG00000000457 7.046826 6.986553  7.229363    7.765725    7.347882  6.970598
##                 11HCAECT2TGF 12HCAECT2TNF 17HCAECT6TGF 18HCAECT6TNF
## ENSG00000000003     7.441796     7.584642     7.714015     7.470884
## ENSG00000000419     7.294261     7.266196     6.626618     6.780364
## ENSG00000000457     7.110690     7.058394     8.009485     8.218843
##                 20HCAECT6mix 21HCAECT6C 23HCAECT6TGF 24HCAECT6TNF 26HCAECT6mix
## ENSG00000000003     7.472844   7.760185     7.837003     7.464133     7.593660
## ENSG00000000419     6.883526   6.752987     6.803655     6.796066     7.015502
## ENSG00000000457     8.035770   8.127435     7.926926     8.281502     8.202484
##                 27HCAECT12C 29HCAECT12TGF 30HCAECT12TNF 32HCAECT12mix
## ENSG00000000003    7.871116      7.902730      7.470539      7.597242
## ENSG00000000419    6.935952      6.815136      6.942202      6.886579
## ENSG00000000457    8.086142      7.883964      7.997837      7.977817
##                 33HCAECT12C 35HCAECT12TGF 36HCAECT12TNF 38HCAECT12mix
## ENSG00000000003    7.578819      7.745528      7.543506      7.621197
## ENSG00000000419    6.635392      6.650163      6.591485      6.796625
## ENSG00000000457    7.850008      7.720075      8.189120      8.003018
##                 39HCAECT24C 41HCAECT24TGF 42HCAECT24TNF 44HCAECT24mix
## ENSG00000000003    7.936548      7.944614      7.611673      7.953912
## ENSG00000000419    6.547906      6.609634      6.836577      6.548009
## ENSG00000000457    8.300628      8.382955      8.557898      8.514061
##                 45HCAECT24C 47HCAECT24TGF 48HCAECT24TNF 50HCAECT24mix
## ENSG00000000003    7.588246      8.123159      7.910187      7.760576
## ENSG00000000419    6.263966      6.708519      6.696089      6.172215
## ENSG00000000457    8.723532      8.586074      8.463991      8.777320
##                 51HCAECT48C 53HCAECT48TGF 54HCAECT48TNF 56HCAECT48mix
## ENSG00000000003    8.083877      8.097203      7.758529      8.011849
## ENSG00000000419    6.536622      6.770668      6.572209      6.822044
## ENSG00000000457    8.499073      8.239837      8.053674      8.021565
##                 57HCAECT48C 59HCAECT48TGF 60HCAECT48TNF 62HCAECT48mix
## ENSG00000000003    7.887411      8.268715      7.902580      8.041099
## ENSG00000000419    6.542875      6.830101      6.613715      6.937499
## ENSG00000000457    8.215768      7.913474      7.824737      8.036282
##                 63HCAECT72C 65HCAECT72TGF 66HCAECT72TNF 68HCAECT72mix
## ENSG00000000003    7.614220      7.834974      7.719266      7.580086
## ENSG00000000419    6.960903      6.698444      6.775310      7.123558
## ENSG00000000457    8.265512      8.490707      8.186225      8.227960
##                 69HCAECT72C 71HCAECT72TGF 72HCAECT72TNF 74HCAECT72mix
## ENSG00000000003    7.864152      7.773177      7.640519      7.602019
## ENSG00000000419    6.750837      6.607506      7.033390      6.862555
## ENSG00000000457    8.403682      8.183483      8.308603      8.203801

7.2 Effect of transformations on the variance

# this gives log2(n + 1)
ntd <- normTransform(dds)
library(vsn)
meanSdPlot(assay(ntd))

meanSdPlot(assay(vsd))

# meanSdPlot(assay(rld))

7.3 Heatmap of the count matrix

dds <- estimateSizeFactors(dds)

library(pheatmap)
select <- order(rowMeans(counts(dds, normalized = TRUE)),
                decreasing = TRUE)[1:20]
df <- as.data.frame(colData(dds)[, c("stimulus", "duplicate")])
pheatmap(assay(ntd)[select,], cluster_rows = FALSE, show_rownames = FALSE,
         cluster_cols = FALSE, annotation_col = df)

pheatmap(assay(vsd)[select,], cluster_rows=FALSE, show_rownames=FALSE,
         cluster_cols=FALSE, annotation_col=df)

# pheatmap(assay(rld)[select,], cluster_rows=FALSE, show_rownames=FALSE,
#          cluster_cols=FALSE, annotation_col=df)

7.4 Heatmap of the sample-to-sample distances

sampleDists <- dist(t(assay(vsd)))

library("RColorBrewer")
sampleDistMatrix <- as.matrix(sampleDists)
rownames(sampleDistMatrix) <- paste(vsd$condition, vsd$type, sep = "-")
# colnames(sampleDistMatrix) <- NULL
colors <- colorRampPalette( rev(brewer.pal(9, "Blues")) )(255)
pheatmap(sampleDistMatrix,
         clustering_distance_rows = sampleDists,
         clustering_distance_cols = sampleDists,
         col = colors)

8 Identify count outliers

par(mar = c(8,5,2,2))
boxplot(log10(assays(dds)[["cooks"]]), range=0, las=2)

9 Dispersion plot

plotDispEsts(dds)

9.1 Principal component plot of the samples

pcaData <- plotPCA(vsd, intgroup = c("stimulus", "time_point"), returnData = TRUE)
percentVar <- round(100 * attr(pcaData, "percentVar"))
ggplot(pcaData, aes(PC1, PC2, color = stimulus, shape = time_point)) +
  geom_point(size = 3) +
  scale_shape_manual(values = c(16,17,15,18,3,9,8)) +
  xlab(paste0("PC1: ",percentVar[1],"% variance")) +
  ylab(paste0("PC2: ",percentVar[2],"% variance")) + 
  coord_fixed()

9.2 PCATools PCA

library(PCAtools)
## Loading required package: ggrepel
## 
## Attaching package: 'PCAtools'
## The following objects are masked from 'package:stats':
## 
##     biplot, screeplot
library(org.Hs.eg.db)
vsd_expr <- assay(vsd) %>% 
  as.data.frame()

# keytypes(org.Hs.eg.db)

# replace ensembl id with symbol
vsd_expr$symbol <- mapIds(org.Hs.eg.db,
                    keys = rownames(vsd_expr),
                    column = "SYMBOL",
                    keytype = "ENSEMBL",
                    multiVals = "first")
## 'select()' returned 1:many mapping between keys and columns
nrow(vsd_expr)
## [1] 34155
vsd_expr <- vsd_expr %>% 
  drop_na(symbol) %>% 
  distinct(symbol, .keep_all = TRUE) %>% 
  remove_rownames() %>% 
  column_to_rownames("symbol")

nrow(vsd_expr)
## [1] 21809
p <- pca(vsd_expr, metadata = colData(dds), removeVar = 0.90) # run PCA and remove lowest 10% variability
## -- removing the lower 90% of variables based on variance
# 0.97 600+ genes
# 0.9 2000+ genes
horn <- parallelPCA(vsd_expr)
## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available

## Warning in check_numbers(x, k = k, nu = nu, nv = nv): more singular values/
## vectors requested than available
horn$n
## [1] 7
elbow <- findElbowPoint(p$variance)
elbow
## PC7 
##   7
# Screeplot
screeplot(p,
    components = getComponents(p, 1:20),
    vline = c(horn$n, elbow)) +

    geom_label(aes(x = horn$n + 1, y = 50,
      label = 'Horn\'s', vjust = -1, size = 8)) +
    geom_label(aes(x = elbow + 1, y = 50,
      label = 'Elbow method', vjust = -1, size = 8))

which(cumsum(p$variance) > 80)[1]
## PC6 
##   6
# Biplot
biplot(p, showLoadings = FALSE,
       lab = NULL,
       labSize = 5, 
       pointSize = 5, 
       sizeLoadingsNames = 5,
       colby = "stimulus",
       shape = 'time_point', shapekey = c("0" = 16, "2" = 17, "6" = 15,
                                          "12" = 18, "24" = 3, "48" = 9, "72" = 8),
       legendPosition = 'left', legendLabSize = 14, legendIconSize = 8.0,
       caption = '5 PCs ≈ 80%'
       )

# scale_shape_manual(values = c(16,17,15,18,3,9,8))
# Biplot
biplot(p, 
       showLoadings = TRUE,
       ntopLoadings = 15,
       lab = NULL,
       labSize = 5, 
       pointSize = 5, 
       sizeLoadingsNames = 4,
       colby = "stimulus",
       shape = 'time_point', shapekey = c("0" = 16, "2" = 17, "6" = 15,
                                          "12" = 18, "24" = 3, "48" = 9, "72" = 8),
       legendPosition = 'left', legendLabSize = 14, legendIconSize = 8.0,
       caption = '5 PCs ≈ 80%'
       )

# biplot(p, showLoadings = TRUE,
#     labSize = 5, pointSize = 5, sizeLoadingsNames = 5)

9.3 Pairs plot - coloured by stimulus

pairsplot(p,
          colby = "stimulus",
          hline = 0, vline = 0,
          pointSize = 2,
          gridlines.major = FALSE, gridlines.minor = FALSE,
          title = 'Pairs plot', plotaxes = FALSE,
          margingaps = unit(c(-0.01, -0.01, -0.01, -0.01), 'cm'),
          shape = 'time_point', shapekey = c("0" = 16, "2" = 17, "6" = 15,
                                          "12" = 18, "24" = 3, "48" = 9, "72" = 8))
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

9.4 Pairs plot - coloured by time point

Single plot example, including legend:

pairsplot(p,
          components = getComponents(p, seq_len(2)),
          colby = "time_point",
          legendPosition = "left",
          hline = 0, vline = 0,
          gridlines.major = FALSE, gridlines.minor = FALSE)
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

pairsplot(p,
          colby = "time_point",
          hline = 0, vline = 0,
          pointSize = 2,
          gridlines.major = FALSE, gridlines.minor = FALSE,
          title = 'Pairs plot', plotaxes = FALSE,
          margingaps = unit(c(-0.01, -0.01, -0.01, -0.01), 'cm'))
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.
## Coordinate system already present. Adding new coordinate system, which will replace the existing one.

plotloadings(p, labSize = 3, rangeRetain = 0.05)
## -- variables retained:
## IFI6, MX1, CST1, IL1RL1, CCL14, MMRN1, TAGLN, SELE, VCAM1, CXCL8, IFIT1, C5orf63, OAS1, APLN, MKI67, TOP2A, LINC00506, LINC00504, UGDH-AS1, ORC4, ASTN2, FRK

  eigencorplot(p,
    metavars = c("stimulus",
                 "time_point",
                 "stim_time"),
    components = getComponents(p, seq_len(5)))
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : stimulus is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : time_point is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : stim_time is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric

  eigencorplot(p,
    metavars = c("stimulus",
                 "time_point",
                 "stim_time"),
    components = getComponents(p, seq_len(5)), 
    col = c('white', 'cornsilk1', 'gold', 'forestgreen', 'darkgreen'),
    cexCorval = 1.2,
    fontCorval = 2,
    posLab = 'all',
    rotLabX = 45,
    scale = TRUE,
    main = bquote(PC ~ Spearman ~ r^2 ~ clinical ~ correlates),
    plotRsquared = TRUE,
    corFUN = 'spearman',
    corUSE = 'pairwise.complete.obs',
    corMultipleTestCorrection = 'BH',
    signifSymbols = c('****', '***', '**', '*', ''),
    signifCutpoints = c(0, 0.0001, 0.001, 0.01, 0.05, 1))
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : stimulus is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : time_point is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric
## Warning in eigencorplot(p, metavars = c("stimulus", "time_point",
## "stim_time"), : stim_time is not numeric - please check the source data as non-
## numeric variables will be coerced to numeric
## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

## Warning in cor.test.default(xvals[, i], yvals[, j], use = corUSE, method =
## corFUN): Cannot compute exact p-value with ties

pcdata <- p$rotated
metadata <- p$metadata %>% 
  as.data.frame()

all(rownames(pcdata) == rownames(metadata))
## [1] TRUE
corr_plot_data <- pcdata %>% 
  # select(1:5) %>% 
  cbind(metadata)

corr_plot_data <- corr_plot_data %>% 
  select(PC1:PC5, time_point, stimulus, stim_time)

# library(GGally)
# ggpairs(corr_plot_data)
corr_plot_data %>%
  pivot_longer(cols = PC1:PC5, names_to = "PC", values_to = "PC_values") %>% 
  ggplot(aes(x = time_point, y = PC_values)) +
  geom_boxplot(aes(fill = time_point)) +
  facet_grid(. ~ PC)

corr_plot_data %>%
  pivot_longer(cols = PC1:PC5, names_to = "PC", values_to = "PC_values") %>% 
  ggplot(aes(x = stimulus, y = PC_values)) +
  geom_boxplot(aes(fill = stimulus)) +
  facet_grid(. ~ PC) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust=1, vjust=1))

corr_plot_data %>%
  pivot_longer(cols = PC1:PC5, names_to = "PC", values_to = "PC_values") %>% 
  ggplot(aes(x = time_point, y = PC_values)) +
  geom_boxplot(aes(fill = time_point)) +
  facet_grid(stimulus ~ PC)

corr_plot_data %>%
  pivot_longer(cols = PC1:PC5, names_to = "PC", values_to = "PC_values") %>% 
  ggplot(aes(x = stimulus, y = PC_values)) +
  geom_boxplot(aes(fill = stimulus)) +
  facet_grid(time_point ~ PC) +
  theme(axis.text.x = element_text(size=14, angle=45, hjust=1, vjust=1))

10 top 30 genes per PC

pc_loadings <- p$loadings %>% 
  rownames_to_column("symbol") %>% 
  select(symbol, everything())

# pc1_genes <- pc_loadings %>% 
#   arrange(desc(abs(PC1) ) ) %>% 
#   slice(1:30) %>% 
#   pull(symbol)

# pull_top_pc_genes <- function(x) {
#   x <- enquo(x)
#   
#   pc_loadings %>% 
#   arrange(desc(abs(!!x) ) ) %>% 
#   slice(1:30) %>% 
#   pull(symbol)
# }

dir.create("pc_genes")
## Warning in dir.create("pc_genes"): 'pc_genes' already exists
# top_pc_genes_save <- function(x) {
#   x <- enquo(x)
#   
#   pc_loadings %>% 
#   arrange(desc(abs(!!x) ) ) %>% 
#   slice(1:30) %>% 
#   select(symbol, !!x)
# }


top_plus_pc_genes_save <- function(x) {
  x <- enquo(x)
  
  pc_loadings %>% 
  arrange(desc(!!x) ) %>% 
  slice(1:30) %>% 
  select(symbol, !!x)
}

top_minus_pc_genes_save <- function(x) {
  x <- enquo(x)
  
  pc_loadings %>% 
  arrange(!!x) %>% 
  slice(1:30) %>% 
  select(symbol, !!x)
}

# PC1
pc1_genes <- top_plus_pc_genes_save(PC1)

pc1_genes %>%
  write_tsv(file = "pc_genes/pc1_plus_genes.txt")

pc1_genes <- top_minus_pc_genes_save(PC1)

pc1_genes %>%
  write_tsv(file = "pc_genes/pc1_minus_genes.txt")

# PC2
pc2_genes <- top_plus_pc_genes_save(PC2)

pc2_genes %>%
  write_tsv(file = "pc_genes/pc2_plus_genes.txt")

pc2_genes <- top_minus_pc_genes_save(PC2)

pc2_genes %>%
  write_tsv(file = "pc_genes/pc2_minus_genes.txt")

# PC3
pc3_genes <- top_plus_pc_genes_save(PC3)

pc3_genes %>%
  write_tsv(file = "pc_genes/pc3_plus_genes.txt")

pc3_genes <- top_minus_pc_genes_save(PC3)

pc3_genes %>%
  write_tsv(file = "pc_genes/pc3_minus_genes.txt")

# PC4
pc4_genes <- top_plus_pc_genes_save(PC4)

pc4_genes %>%
  write_tsv(file = "pc_genes/pc4_plus_genes.txt")

pc4_genes <- top_minus_pc_genes_save(PC4)

pc4_genes %>%
  write_tsv(file = "pc_genes/pc4_minus_genes.txt")

# PC5
pc5_genes <- top_plus_pc_genes_save(PC5)

pc5_genes %>%
  write_tsv(file = "pc_genes/pc5_plus_genes.txt")

pc5_genes <- top_minus_pc_genes_save(PC5)

pc5_genes %>%
  write_tsv(file = "pc_genes/pc5_minus_genes.txt")


# pc1_genes <- top_pc_genes_save(PC1)
# 
# pc1_genes %>% 
#   write_tsv(file = "pc_genes/pc1_genes.txt")
# 
# pc2_genes <- top_pc_genes_save(PC2)
# 
# pc2_genes %>% 
#   write_tsv(file = "pc_genes/pc2_genes.txt")
# 
# pc3_genes <- top_pc_genes_save(PC3)
# 
# pc3_genes %>% 
#   write_tsv(file = "pc_genes/pc3_genes.txt")
# 
# pc4_genes <- top_pc_genes_save(PC4)
# 
# pc4_genes %>% 
#   write_tsv(file = "pc_genes/pc4_genes.txt")
# 
# pc5_genes <- top_pc_genes_save(PC5)
# 
# pc5_genes %>% 
#   write_tsv(file = "pc_genes/pc5_genes.txt")

11 Expression of EMT genes and apoptosis genes

Often, a list of standard EMT genes are measured by PCR. Since we have RAN-seq data of samples, we can simply look at the expression of these same genes. ### Tidy bulk data From here, extract the gene expression values, plus gene identifier, annotate with gene symbol, and select the genes of our interest.

# Import EMT genes
emt_pcr_genes <- c("VIM",
                   "ACTA2",
                   "TAGLN",
                   "CDH5",
                   "CDH2",
                   "FSP1",
                   "VWF",
                   "CD31",
                   "FN1")

# Import apoptosis genes
# apoptosis_genes
apoptosis_genes <- read_xlsx("Apoptosis_geneset_06152021.xlsx", skip = 2, col_names = "symbol") %>% 
  pull(symbol)

# Import TF
emt_tf_sel <- c("ZEB1", "YAP1", "WWTR1", "PRRX1", "KLF4", "ERG")

# Expression data
expression_data <- assay(vsd) %>% 
  as.data.frame()


# extract expression values from vsd, including ensembl names
# expression_data <- as_tibble(data.frame(gene_ensembl = rowRanges(vsd)$feature_id, assay(vsd))) %>%
#      mutate_at(vars(c("gene_ensembl")), list(as.character)) ## gene_ensembl needs to be character for annotation to work

# annotations
# gene symbol - via org.Hs.eg.db
# columns(org.Hs.eg.db)
expression_data$symbol <- mapIds(org.Hs.eg.db,
                    keys = rownames(expression_data),
                    column = "SYMBOL",
                    keytype = "ENSEMBL",
                    multiVals = "first")
## 'select()' returned 1:many mapping between keys and columns
# tidy and subset
expression_data_sel <- expression_data %>%
     dplyr::select(symbol, everything()) %>%
     # filter(symbol == "APOE" | symbol == "TRIB3") %>% # example: filter APOE and TRIB3
     filter(symbol %in% c(emt_pcr_genes, apoptosis_genes, emt_tf_sel) )

head(expression_data_sel)[1:10]
# # tidy and subset non-selected genes
# set.seed(1000)
# expression_data_sample <- expression_data %>%
#      dplyr::select(gene_ensembl, symbol, everything()) %>%
#      sample_n(1000) %>% 
#      unite(symbol_ensembl, symbol, gene_ensembl, sep = "_", remove = FALSE)
# 
# expression_data_sample_mean <- expression_data_sample %>%
#   select_if(is.numeric) %>%
#   colMeans() %>% 
#   as_tibble(rownames = "study_number") %>% 
#   dplyr::rename(expression_value_sample = value)

Furthermore, the expression_data_sel df was gathered into a long form df for annotation with symptoms variables from vsd object, and later visualization and statistics.

# gather expression_data_sel df into long df form for annotation, plotting and statistics
expression_long <- expression_data_sel %>% 
  pivot_longer(cols = -symbol, names_to = "exp_sample_name", values_to = "expression_value")

# Annotate with clinical variables
expression_long <- expression_long %>% 
  left_join(metadata %>% 
              rownames_to_column("exp_sample_name"),
            by = "exp_sample_name" )

# expression_long %>% 
#   write_tsv("emt_tf_vector_expression.txt")

Expression of EMT genes in bulk RNA-seq, per time point and per stimulus

# define 4 colours
library(feather)
library(ggpubr)
qual_col4 <- get_pal("rose_crowned_fruit_dove")[c(1, 2, 5, 8)]
qual_col7 <- get_pal("bee_eater")
# qual_col7 <- get_pal("cassowary")[1:7]

p2 <- expression_long %>% 
  filter(symbol %in% emt_pcr_genes) %>% 
  ggplot(aes(x = time_point, y = expression_value, colour = time_point)) +
         geom_boxplot() +
  geom_jitter(size = 0.4, alpha = 0.5, position = position_jitterdodge(dodge.width = 0.8)) +
  xlab("Gene") +
  ylab("Expression value") +
  theme_bw() +
  # theme(axis.text.x = element_text(size = 14, angle = 45, hjust = 1, vjust = 1)) + # change orientation of x-axis labels
  scale_fill_manual(values = qual_col7) +
  scale_colour_manual(values = qual_col7) +
  facet_grid(symbol ~ stimulus, scales = "free_y")
p2

Expression of apoptosis genes in bulk RNA-seq, per time point and per stimulus

p3 <- expression_long %>% 
  filter(symbol %in% apoptosis_genes) %>% 
  ggplot(aes(x = time_point, y = expression_value, colour = time_point)) +
         geom_boxplot() +
  geom_jitter(size = 0.4, alpha = 0.5, position = position_jitterdodge(dodge.width = 0.8)) +
  xlab("Gene") +
  ylab("Expression value") +
  theme_bw() +
  # theme(axis.text.x = element_text(size = 14, angle = 45, hjust = 1, vjust = 1)) + # change orientation of x-axis labels
  scale_fill_manual(values = qual_col7) +
  scale_colour_manual(values = qual_col7) +
  facet_grid(symbol ~ stimulus, scales = "free_y")
p3

Expression of hand-picked TF in bulk RNA-seq, per time point and per stimulus

p4 <- expression_long %>% 
  filter(symbol %in% emt_tf_sel) %>% 
  ggplot(aes(x = time_point, y = expression_value, colour = time_point)) +
         geom_boxplot() +
  geom_jitter(size = 0.4, alpha = 0.5, position = position_jitterdodge(dodge.width = 0.8)) +
  xlab("Gene") +
  ylab("Expression value") +
  theme_bw() +
  # theme(axis.text.x = element_text(size = 14, angle = 45, hjust = 1, vjust = 1)) + # change orientation of x-axis labels
  scale_fill_manual(values = qual_col7) +
  scale_colour_manual(values = qual_col7) +
  facet_grid(symbol~stimulus, scales = "free_y")
p4

12 Session information

sessionInfo()
## R version 4.1.0 (2021-05-18)
## Platform: x86_64-pc-linux-gnu (64-bit)
## Running under: Ubuntu 18.04.5 LTS
## 
## Matrix products: default
## BLAS:   /usr/lib/x86_64-linux-gnu/openblas/libblas.so.3
## LAPACK: /usr/lib/x86_64-linux-gnu/libopenblasp-r0.2.20.so
## 
## locale:
##  [1] LC_CTYPE=en_US.UTF-8       LC_NUMERIC=C              
##  [3] LC_TIME=nl_NL.UTF-8        LC_COLLATE=en_US.UTF-8    
##  [5] LC_MONETARY=nl_NL.UTF-8    LC_MESSAGES=en_US.UTF-8   
##  [7] LC_PAPER=nl_NL.UTF-8       LC_NAME=C                 
##  [9] LC_ADDRESS=C               LC_TELEPHONE=C            
## [11] LC_MEASUREMENT=nl_NL.UTF-8 LC_IDENTIFICATION=C       
## 
## attached base packages:
## [1] parallel  stats4    stats     graphics  grDevices utils     datasets 
## [8] methods   base     
## 
## other attached packages:
##  [1] ggpubr_0.4.0                feather_0.0.0.9000         
##  [3] PCAtools_2.4.0              ggrepel_0.9.1              
##  [5] RColorBrewer_1.1-2          pheatmap_1.0.12            
##  [7] vsn_3.60.0                  forcats_0.5.1              
##  [9] stringr_1.4.0               dplyr_1.0.7                
## [11] purrr_0.3.4                 readr_1.4.0                
## [13] tidyr_1.1.3                 tibble_3.1.2               
## [15] ggplot2_3.3.5               tidyverse_1.3.1            
## [17] readxl_1.3.1                DESeq2_1.32.0              
## [19] SummarizedExperiment_1.22.0 MatrixGenerics_1.4.0       
## [21] matrixStats_0.59.0          GenomicRanges_1.44.0       
## [23] GenomeInfoDb_1.28.0         org.Hs.eg.db_3.13.0        
## [25] AnnotationDbi_1.54.1        IRanges_2.26.0             
## [27] S4Vectors_0.30.0            Biobase_2.52.0             
## [29] BiocGenerics_0.38.0        
## 
## loaded via a namespace (and not attached):
##   [1] colorspace_2.0-2          ggsignif_0.6.2           
##   [3] rio_0.5.27                ellipsis_0.3.2           
##   [5] XVector_0.32.0            fs_1.5.0                 
##   [7] rstudioapi_0.13           farver_2.1.0             
##   [9] hexbin_1.28.2             affyio_1.62.0            
##  [11] bit64_4.0.5               fansi_0.5.0              
##  [13] lubridate_1.7.10          xml2_1.3.2               
##  [15] sparseMatrixStats_1.4.0   splines_4.1.0            
##  [17] cachem_1.0.5              geneplotter_1.70.0       
##  [19] knitr_1.33                jsonlite_1.7.2           
##  [21] broom_0.7.8               annotate_1.70.0          
##  [23] dbplyr_2.1.1              png_0.1-7                
##  [25] BiocManager_1.30.16       compiler_4.1.0           
##  [27] httr_1.4.2                dqrng_0.3.0              
##  [29] backports_1.2.1           assertthat_0.2.1         
##  [31] Matrix_1.3-4              fastmap_1.1.0            
##  [33] limma_3.48.1              cli_2.5.0                
##  [35] BiocSingular_1.8.1        htmltools_0.5.1.1        
##  [37] tools_4.1.0               rsvd_1.0.5               
##  [39] gtable_0.3.0              glue_1.4.2               
##  [41] GenomeInfoDbData_1.2.6    affy_1.70.0              
##  [43] reshape2_1.4.4            Rcpp_1.0.6               
##  [45] carData_3.0-4             cellranger_1.1.0         
##  [47] vctrs_0.3.8               Biostrings_2.60.1        
##  [49] preprocessCore_1.54.0     DelayedMatrixStats_1.14.0
##  [51] xfun_0.24                 openxlsx_4.2.4           
##  [53] beachmat_2.8.0            rvest_1.0.0              
##  [55] irlba_2.3.3               lifecycle_1.0.0          
##  [57] rstatix_0.7.0             XML_3.99-0.6             
##  [59] zlibbioc_1.38.0           scales_1.1.1             
##  [61] hms_1.1.0                 curl_4.3.2               
##  [63] yaml_2.2.1                memoise_2.0.0            
##  [65] stringi_1.6.2             RSQLite_2.2.7            
##  [67] highr_0.9                 genefilter_1.74.0        
##  [69] ScaledMatrix_1.0.0        zip_2.2.0                
##  [71] BiocParallel_1.26.0       rlang_0.4.11             
##  [73] pkgconfig_2.0.3           bitops_1.0-7             
##  [75] evaluate_0.14             lattice_0.20-44          
##  [77] labeling_0.4.2            cowplot_1.1.1            
##  [79] bit_4.0.4                 tidyselect_1.1.1         
##  [81] plyr_1.8.6                magrittr_2.0.1           
##  [83] R6_2.5.0                  generics_0.1.0           
##  [85] DelayedArray_0.18.0       DBI_1.1.1                
##  [87] foreign_0.8-81            pillar_1.6.1             
##  [89] haven_2.4.1               withr_2.4.2              
##  [91] abind_1.4-5               survival_3.2-11          
##  [93] KEGGREST_1.32.0           RCurl_1.98-1.3           
##  [95] car_3.0-11                modelr_0.1.8             
##  [97] crayon_1.4.1              utf8_1.2.1               
##  [99] rmarkdown_2.9             locfit_1.5-9.4           
## [101] grid_4.1.0                data.table_1.14.0        
## [103] blob_1.2.1                reprex_2.0.0             
## [105] digest_0.6.27             xtable_1.8-4             
## [107] munsell_0.5.0